This is the Notebook of the project of the group composed by:
Name - codice persona - matricola
Massimo Pavan 10528264 945302
Francesco Peressini 10523034 928060
import lxml.etree as etree
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.express as px
import plotly.figure_factory as ff
import chart_studio.plotly as py
import plotly.graph_objs as go
import chart_studio
import pandas as pd
import plotly.io as pio
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
# setting user, api key and access token
chart_studio.tools.set_credentials_file(username='pavmassimo', api_key='YLSreh15YEIWWNq75Lxx')
mapbox_access_token = 'pk.eyJ1IjoicGF2bWFzc2ltbyIsImEiOiJja2RzeGh4cHYxc2w1MnJ0YWczZndyZG50In0.GKQETZ2daXd-VrybJ571DQ'
We created the dataframe from the raw data using xpath to explore the .tcx files, handling the exceptions using try-except blocks to ensure robustness.
def df_import (path):
xtree = etree.parse(path)
xroot = xtree.getroot()
#ns is the default namespace of the tree
ns = {'d': 'http://www.garmin.com/xmlschemas/TrainingCenterDatabase/v2'}
#Since Activity is always one element, we put the [0] to take the element out of the list
Activity = xroot.findall(".//d:Activity", ns)[0]
df_cols = ["time", "latitude", "longitude", "altitude", "distance", "heartRate", "lap"]
rows = []
lapnum = 0;
for lap in Activity.findall("./d:Lap", ns):
for elem in lap.findall("./d:Track/d:Trackpoint", ns):
s_time = etree.tostring(elem.find("./d:Time", ns), method="text").strip().decode("utf-8")
try:
s_latitude = float(etree.tostring(elem.find("./d:Position/d:LatitudeDegrees", ns), method="text").strip().decode("utf-8"))
except TypeError:
s_latitude = np.nan
try:
s_longitude = float(etree.tostring(elem.find("./d:Position/d:LongitudeDegrees", ns), method="text").strip().decode("utf-8"))
except TypeError:
s_longitude = np.nan
try:
s_altitude = float(etree.tostring(elem.find("./d:AltitudeMeters", ns), method="text").strip().decode("utf-8"))
except TypeError:
s_altitude = np.nan
try:
s_distance = float(etree.tostring(elem.find("./d:DistanceMeters", ns), method="text").strip().decode("utf-8"))
except TypeError:
s_distance = np.nan
try:
s_heartRate = int(etree.tostring(elem.find("./d:HeartRateBpm/d:Value", ns), method="text").strip().decode("utf-8"))
except TypeError:
s_heartRate = np.nan
rows.append({"time": s_time, "latitude": s_latitude,
"longitude": s_longitude, "altitude": s_altitude,
"distance": s_distance, "heartRate": s_heartRate,
"lap": lapnum})
lapnum = lapnum +1
out_df = pd.DataFrame(rows, columns = df_cols)
out_df['time'] = pd.to_datetime(out_df.time)
out_df.set_index('time')
return out_df
We selected randomly some runs to work on.
out_df = df_import('./Data/activities/264774936.tcx')
out_df2 =df_import('./Data/activities/1066731959.tcx')
out_df3 =df_import('./Data/activities/540424210.tcx')
out_df4 =df_import('./Data/activities/1065891372.tcx')
out_df5 = df_import('./Data/activities/750144106.tcx')
out_df
As stated in the "working notebook", we noticed that the gps data are too noisy to use them without any kind of normalization.
We now try to resort to the sliding windows approach with the pace calculated as min/km. To calculate this value, similarly to what we did before, we set a window of 60 seconds, so to enrich each data point of the original dataset with the average speed in the 60 seconds before the considered point.
In the first 60 seconds, the size of the window is set to the number of second passed from the beginning, and the distance is calculated from the first point.
If the distance between two point is zero, for example if the runner stopped for a while, the pace is set to NaN, because otherwise it would go to infinite. The same is done with values of speed really low (<10 min/km) in order to not introduce noise in the visualizations.
def df_normalize(out_df, window_size):
out_df_norm = out_df.copy()
w = window_size
#ADDING INSTANT SPEED TO DATAFRAME
out_df_norm.loc[0, 'speed'] = 0
#hypothesizing that all records are at 1 second distance from each other
for i in range(1, w):
out_df_norm.loc[i, 'speed'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[0, 'distance']) / i) * 3.6 #conversion to km/h
for i in range(w, len(out_df)):
out_df_norm.loc[i, 'speed'] = ((out_df_norm.loc[i, 'distance'] - out_df_norm.loc[i-w, 'distance']) / w) * 3.6 #conversion to km/h
#ADDING PACE TO DATAFRAME
out_df_norm.loc[0, 'pace min/km'] = np.nan
#hypothesizing that all records are at 1 second distance from each other
for x in range(1, w):
if (((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[0, 'distance']) >0) and ((x/60)/((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[0, 'distance'])/1000) <10)):
out_df_norm.loc[x, 'pace min/km'] = (x/60)/((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[0, 'distance'])/1000)
else:
out_df_norm.loc[x, 'pace min/km'] = np.nan
for x in range(w, len(out_df)):
if (out_df_norm.loc[x, 'distance'] - out_df_norm.loc[x-w, 'distance']):
out_df_norm.loc[x, 'pace min/km'] = (w/60)/((out_df_norm.loc[x, 'distance'] - out_df_norm.loc[x-w, 'distance'])/1000)
else:
out_df_norm.loc[x, 'pace min/km'] = np.nan
return out_df_norm
out_df_norm = df_normalize(out_df, 60)
out_df_norm2 = df_normalize(out_df2, 60)
out_df_norm4 = df_normalize(out_df4, 60)
out_df_norm3 = df_normalize(out_df3, 60)
out_df_norm5 = df_normalize(out_df5, 60)
out_df_norm
We now try to build a dataset by discretizing in blocks of about a fixed distance the original dataset (out_df). We keep the distance and time values as they were in the out_df, we calculate the heartRate as the average of all the values inside the block. Finally we calculate the speed as:
(time_at_end_of_block - time_at_start_of_block)/ (distance_at_end_of_block - distance_at_start_of_block)
and use some conversion values (from second to minute and from meters to km).
As a drawback of this method to normalize the speed there is the fact that we "lose" a lot of data point that gets summarized, resulting in less accuracy. Furthermore, not all the blocks are composed by the same number of data points from the original dataset.
def df_discretize(out_df, discretization_size):
#hypothesizing that all records are at 1 second distance from each other
speed_df = pd.DataFrame(columns = ['pace min/km', 'distance', 'time', 'heartRate', 'gainedAltitude'])
# blocks is a variable used to count the number of blocks read so far
blocks = 0
# x is a variable used to read every instance of the dataset
x = 0
#alt is the altitude recorded in the previous km
alt = out_df.loc[x, 'altitude']
# heartsum is a variable used to keep track of the sum of all the BPM read in the block so far and their count
heartsum = [0 , 1]
#it is initialized in this way to keep track of the first value of heartRate of the dataset
heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance, time, heartrate from original dataset
while (x < len(out_df)):
if (out_df.loc[x, 'distance'] >= blocks*discretization_size):
speed_df.loc[blocks, 'distance'] = out_df.loc[x, 'distance']
speed_df.loc[blocks, 'time'] = x
speed_df.loc[blocks, 'gainedAltitude'] = out_df.loc[x, 'altitude']-alt
alt = out_df.loc[x, 'altitude']
#the avg heartrate in the block is calculated as the sum of the heartbeat over the size of the block
speed_df.loc[blocks, 'heartRate'] = heartsum[0] / heartsum[1]
heartsum = [0, 0]
blocks = blocks +1
heartsum[0] = heartsum[0] + out_df.loc[x, 'heartRate']
heartsum[1] = heartsum[1] + 1
x = x+1
#then we add to the new dataframe the measurement regarding the last non-completed block
speed_df.loc[blocks, 'distance'] = out_df.loc[x-1, 'distance']
speed_df.loc[blocks, 'time'] = x-1
speed_df.loc[blocks, 'gainedAltitude'] = out_df.loc[x-1, 'altitude'] - alt
#the avg heartrate in the block is calculated as the sum of the heartbeat over the size of the block
speed_df.loc[blocks, 'heartRate'] = heartsum[0] / heartsum[1]
#calculate speed
for x in range(1, len(speed_df)):
speed_df.loc[x, 'pace min/km'] = ((speed_df.loc[x, 'time'] - speed_df.loc[x-1, 'time'])/60 ) / ((speed_df.loc[x, 'distance'] - speed_df.loc[x-1, 'distance'])/1000)
return speed_df
speed_df = df_discretize(out_df, 1000)
speed_df2 = df_discretize(out_df2, 1000)
speed_df.head(40)
Here we try to "smooth" the discretized df with a new sliding window approach. In the earlier normalization of the dataset we used a window centered "in the past" (each point was calculated as the average of the previous X points), under the assumption that the heartRate was somehow dependent on the pace of the latest X seconds. Now, since the goal is to obtain a smoothing to better visualize the information in the "line path" visualization, we opted for a simmetric window instead, centered on the point.
At the beginning and at the end of the dataset, the window size is set to "as large as it can be" (in the first point is always 1, in the second 3...)
def df_smooth(discretized_df, smoothing_window):
speed_df = pd.DataFrame(columns = ['pace min/km', 'distance', 'time', 'heartRate'])
speed_df['distance'] = discretized_df['distance']
speed_df['time'] = discretized_df['time']
x = 0
while (x < len(discretized_df)):
speed_df.loc[x, 'heartRate'] = discretized_df.loc[x, 'heartRate']
speed_df.loc[x, 'pace min/km'] = discretized_df.loc[x, 'pace min/km']
var = int(smoothing_window/2)
if (var > x):
var = x
count = 1
while (var != 0):
speed_df.loc[x, 'heartRate'] = speed_df.loc[x, 'heartRate'] + discretized_df.loc[x - var , 'heartRate'] + discretized_df.loc[x + var , 'heartRate']
speed_df.loc[x, 'pace min/km'] = speed_df.loc[x, 'pace min/km'] + discretized_df.loc[x - var , 'pace min/km'] + discretized_df.loc[x + var , 'pace min/km']
var = var -1
count = count +2
speed_df.loc[x, 'heartRate'] = speed_df.loc[x, 'heartRate'] / count
speed_df.loc[x, 'pace min/km'] = speed_df.loc[x, 'pace min/km'] / count
elif (var >= (len(discretized_df) -x -1 ) ): #-x
var = (len(discretized_df) -x -1)
count = 1
while (var != 0):
speed_df.loc[x, 'heartRate'] = speed_df.loc[x, 'heartRate'] + discretized_df.loc[x - var , 'heartRate'] + discretized_df.loc[x + var , 'heartRate']
speed_df.loc[x, 'pace min/km'] = speed_df.loc[x, 'pace min/km'] + discretized_df.loc[x - var , 'pace min/km'] + discretized_df.loc[x + var , 'pace min/km']
var = var -1
count = count+2
speed_df.loc[x, 'heartRate'] = speed_df.loc[x, 'heartRate'] / count
speed_df.loc[x, 'pace min/km'] = speed_df.loc[x, 'pace min/km'] / count
else:
while (var != 0):
speed_df.loc[x, 'heartRate'] = speed_df.loc[x, 'heartRate'] + discretized_df.loc[x - var , 'heartRate'] + discretized_df.loc[x + var , 'heartRate']
speed_df.loc[x, 'pace min/km'] = speed_df.loc[x, 'pace min/km'] + discretized_df.loc[x - var , 'pace min/km'] + discretized_df.loc[x + var , 'pace min/km']
var = var -1
speed_df.loc[x, 'heartRate'] = speed_df.loc[x, 'heartRate'] / smoothing_window
speed_df.loc[x, 'pace min/km'] = speed_df.loc[x, 'pace min/km'] / smoothing_window
x = x+1
return speed_df
smooth_df = df_smooth(df_discretize(out_df_norm, 100).iloc[1:].reset_index(drop=True), 11)
smooth_df
#viz_line_training(smooth_df)
import os
name_list = os.listdir('./Data/activities/')
df_list = []
for name in name_list :
df_list.append(df_normalize(df_import('./Data/activities/' + name), 60))
all_df = pd.concat(df_list).reset_index(drop = True)
all_df
This is a particular manipulation of the dataset, that is used to build the heatbar visualization. The data are here summarized in function of the pace and the heartRate, which have been discretized in step of fixed dimension.
The percentage column is used to calculate the frequence percentage of each point with regards to the summation of the frequences of all the point in the discrete pace step (or bin), while instead the percentage_of_max column contains the frequence percentage with regards to the maximum frequence of a data point registered in the bin.
def df_bpm_pace(df1):
df_viz1 = df1.sort_values("pace min/km")
df_viz1 = df_viz1.reset_index(drop = True)
pace_step_array = []
bpm_pace_df = pd.DataFrame(columns = ['pace min/km', 'heartRate', 'percentage', 'percentage_of_max'])
for x in range(0, 20, 1):
pace_step_df = df_viz1[(df_viz1["pace min/km"] >= x/2) & (df_viz1["pace min/km"] < x/2+0.5)]
pace_step_array.append(pace_step_df)
bpm_array = []
pace_array = []
count_array = []
for step in range(len(pace_step_array)):
considered_df = pace_step_array[step]
for x in range (0, 210, 1):
bpm_array.append(x)
pace_array.append(step/2)
count_array.append(len(considered_df[considered_df["heartRate"] == x]))
bpm_pace_df['pace min/km'] = pace_array
bpm_pace_df['heartRate'] = bpm_array
total = []
for step in range(len(pace_step_array)):
total.append(sum(count_array[210*step:210*(step+1)]))
percentages = []
for step in range(len(total)):
for x in count_array[210*step:210*(step+1)]:
if total[step] == 0:
percentages.append((1/210) *100)
else:
percentages.append((x/total[step]) * 100)
bpm_pace_df['percentage'] = percentages
max_total = []
for step in range(len(pace_step_array)):
max_total.append(max(count_array[210*step:210*(step+1)]))
max_percentages = []
for step in range(len(total)):
for x in count_array[210*step:210*(step+1)]:
if max_total[step] == 0:
max_percentages.append((1/210) *100)
else:
max_percentages.append((x/max_total[step]) * 100)
bpm_pace_df['percentage_of_max'] = max_percentages
return bpm_pace_df
#print(pace_step_array)
bpm_pace_df = df_bpm_pace(all_df)
bpm_pace_df.describe()
def df_bpm_pace_dual(df1):
df_viz1 = df1.sort_values("heartRate")
df_viz1 = df_viz1.reset_index(drop = True)
bpm_step_array = []
bpm_pace_df = pd.DataFrame(columns = ['pace min/km', 'heartRate', 'percentage_of_max', 'percentage'])
for x in range(0, 210, 10):
bpm_step_df = df_viz1[(df_viz1["heartRate"] >= x) & (df_viz1["heartRate"] < x+10)]
bpm_step_array.append(bpm_step_df)
bpm_array = []
pace_array = []
count_array = []
for step in range(len(bpm_step_array)):
considered_df = bpm_step_array[step]
for x in range (0, 100, 1):
bpm_array.append(step * 10)
pace_array.append(x/10)
count_array.append(len(considered_df[(considered_df["pace min/km"] >= x/10) & (considered_df["pace min/km"] < (x/10 +0.1)) ]))
bpm_pace_df['pace min/km'] = pace_array
bpm_pace_df['heartRate'] = bpm_array
total = []
for step in range(len(bpm_step_array)):
total.append(sum(count_array[100*step:100*(step+1)]))
percentages = []
for step in range(len(total)):
for x in count_array[100*step:100*(step+1)]:
if total[step] == 0:
percentages.append((1/210) *100)
else:
percentages.append((x/total[step]) * 100)
bpm_pace_df['percentage'] = percentages
max_total = []
for step in range(len(bpm_step_array)):
max_total.append(max(count_array[100*step:100*(step+1)]))
max_percentages = []
for step in range(len(total)):
for x in count_array[100*step:100*(step+1)]:
if max_total[step] == 0:
max_percentages.append((1/100) *100)
else:
max_percentages.append((x/max_total[step]) * 100)
bpm_pace_df['percentage_of_max'] = max_percentages
return bpm_pace_df
#print(pace_step_array)
bpm_pace_df = df_bpm_pace_dual(all_df)
bpm_pace_df.describe()
runs_array = []
names_array = []
runs_array.append(out_df_norm3)
names_array.append("fast interval training")
runs_array.append(df_list[22])
names_array.append("moderate interval training")
runs_array.append(df_list[14])
names_array.append("slow run")
runs_array.append(df_list[16])
names_array.append( "race")
runs_array.append(df_list[20])
names_array.append("progressive training")
The first visualization is a simple map of the run.
We made it using the "Mapbox" api for plotly.
There is some kind of interaction for displaying the single kms and the mouse hover provides additional information on the specific data point of the run.
data1 = []
# /1000 because distance is registered in meters
kms = [item for item in range(0, int(max(out_df_norm['distance'])/1000)+1)]
x= 0
y= 1
for km in kms:
km_data = dict(
lat = out_df_norm.loc[(((out_df_norm['distance'] < (km+1) *1000) & (out_df_norm['distance'] >= (km*1000)))),'latitude'],
lon = out_df_norm.loc[(((out_df_norm['distance'] < (km+1) *1000) & (out_df_norm['distance'] >= (km*1000)))),'longitude'],
#text = distance + "<br>"+ customdata,
customdata = out_df_norm.loc[(((out_df_norm['distance'] < (km+1) *1000) & (out_df_norm['distance'] >= (km*1000)))),['distance', 'heartRate', 'pace min/km']],
hovertemplate =
'distance: %{customdata[0]}' +
'<br>BPM: %{customdata[1]}' +
'<br>Pace: %{customdata[2]:.2f} min/km',
name = km,
marker = dict(size = 8, opacity = 0.8),
type = 'scattermapbox'
)
data1.append(km_data)
layout = dict(
height = 800,
# top, bottom, left and right margins
margin = dict(t = 0, b = 0, l = 0, r = 0),
font = dict(color = '#FFFFFF', size = 11),
paper_bgcolor = '#000000',
mapbox = dict(
# here you need the token from Mapbox
accesstoken = mapbox_access_token,
bearing = 0,
# where we want the map to be centered
center = dict(
lat = out_df_norm['latitude'][0],
lon = out_df_norm['longitude'][0]
),
# we want the map to be "parallel" to our screen, with no angle
pitch = 0,
# default level of zoom
zoom = 15,
# default map style
style = 'dark'
)
)
visibility = [False for item in range(0, int(max(out_df_norm['distance'])/1000)+1)]
visibility_all = [True for item in range(0, int(max(out_df_norm['distance'])/1000)+1)]
# for each button I specify which dictionaries of my data list I want to visualize. Remember I have n different
# kms to visualize but I have n+1 options: the first will show all of them, while from the second to the last option, only
# one km at the time will be shown on the map
buttons=list([
dict(label = 'All kms',
method = 'update',
args = [{'visible': visibility_all}])
])
for item in range(0, int(max(out_df_norm['distance'])/1000)+1):
visibility_partial = visibility.copy()
visibility_partial[item] = True;
buttons.append(
dict(label = item,
method = 'update',
args = [{'visible': visibility_partial}])
)
# drop-down 2: select km to visualize
viz =dict(
buttons = buttons,
# direction where the drop-down expands when opened
direction = 'down',
# positional arguments
x = 0.01,
xanchor = 'left',
y = 0.99,
yanchor = 'bottom',
# fonts and border
bgcolor = '#000000',
bordercolor = '#FFFFFF',
font = dict(size=11)
)
updatemenus=list([
# drop-down 1: map styles menu
# buttons containes as many dictionaries as many alternative map styles I want to offer
dict(
buttons=list([
dict(
args=['mapbox.style', 'dark'],
label='Dark',
method='relayout'
),
dict(
args=['mapbox.style', 'light'],
label='Light',
method='relayout'
),
dict(
args=['mapbox.style', 'outdoors'],
label='Outdoors',
method='relayout'
),
dict(
args=['mapbox.style', 'satellite-streets'],
label='Satellite with Streets',
method='relayout'
)
]),
# direction where I want the menu to expand when I click on it
direction = 'up',
# here I specify where I want to place this drop-down on the map
x = 0.75,
xanchor = 'left',
y = 0.05,
yanchor = 'bottom',
# specify font size and colors
bgcolor = '#000000',
bordercolor = '#FFFFFF',
font = dict(size=11)
),
viz
])
# assign the list of dictionaries to the layout dictionary
layout['updatemenus'] = updatemenus
figure = dict(data = data1, layout = layout)
#fig.show()
#py.iplot(figure, filename = 'kms')
We then worked to display some aggregated statistics of a single run.
We first thought of displaying only the values on a per km basis, and then we enriched the visualization making three different vector heatmaps, one for each static that we considered interesting to visualize.
For what regards the colors we opted for using the viridis color palette "plasma" for displaying Pace and BPM statistics, which is a standard choice for heatmaps and it is also colorblind safe. For displaying the meters gained, we used a diverging color palette with blue and red as main colors which is still a colorblind safe solution.
def viz_stats(speed_df):
#create pace plot
heat = []
heat.append(speed_df['pace min/km'].iloc[1:].reset_index(drop= True).apply(lambda x: float("%.2f" % x)))
z = []
for x in heat[0]:
z.append([x])
x = ['Min/km']
y = list(range(len(z)))
fig = ff.create_annotated_heatmap(z = z, y = y, x=x, reversescale=True,)
fig.update_yaxes(autorange="reversed", title = "KM")
fig.show()
#py.plot(fig)
#create heart rate plot
heat = []
heat.append(speed_df['heartRate'].iloc[1:].reset_index(drop= True).apply(lambda x: int("%.0f" % x)))
z = []
for x in heat[0]:
z.append([x])
x = ['BPM']
y = list(range(len(z)))
fig = ff.create_annotated_heatmap(z = z, y = y, x=x,)
fig.update_yaxes(autorange="reversed", title = "KM")
fig.show()
#py.plot(fig)
#create gained altitude plot
font_colors = ['white', 'black']
heat = []
heat.append(speed_df['gainedAltitude'].iloc[1:].reset_index(drop= True).apply(lambda x: float("%.2f" % x)))
z = []
for x in heat[0]:
z.append([x])
x = ['Meters Gained']
y = list(range(len(z)))
fig = ff.create_annotated_heatmap(z = z, y = y, x=x, colorscale=px.colors.diverging.RdBu, zmid=0, font_colors=font_colors)
fig.update_yaxes(autorange="reversed", title = "KM")
fig.show()
#py.plot(fig)
viz_stats(speed_df)
With this visualization we started comparing the original run (run A) with some other runs (run 0, run 1, run 2).
In the dashboard we can imagine that these latest runs are the last x runs from the same runner or from another one.
We compared them using a lineplot displaying the pace vs the distance. We used again a blue-red color encoding to ensure colorblind safeness.
We used the distance instead of time to follow the standards of the main available running applications.
In this and the following visualizations the mouse hover are used to give more precise information on the single point.
def viz_line(out_df, df_array, names_df):
fig = go.Figure()
fig.add_trace(go.Scatter(
x=out_df.distance,
y=out_df['pace min/km'],
name= "LAST RUN",
line = dict(color='#4575b4'),
opacity = 0.8,
hovertemplate =
'Pace: %{y:.2f} min/km'+
'<br>Distance: %{x:.0f} m<br>',))
i = 0
for i in range(len(df_array)):
if i != 0:
fig.add_trace(go.Scatter(
x=df_array[i].distance,
y=df_array[i]['pace min/km'],
name= names_df[i],
line = dict(color='#d73027'),
opacity = 0.8,
hovertemplate =
'Pace: %{y:.2f} min/km'+
'<br>Distance: %{x:.0f} m<br>', visible = False))
else :
fig.add_trace(go.Scatter(
x=df_array[i].distance,
y=df_array[i]['pace min/km'],
name= names_df[i],
line = dict(color='#d73027'),
opacity = 0.8,
hovertemplate =
'Pace: %{y:.2f} min/km'+
'<br>Distance: %{x:.0f} m<br>', visible = True))
visibility = [True]
for x in df_array:
visibility.append(False)
buttons = []
for x in range(len(df_array)):
viz = visibility.copy()
viz[x+1] = True
buttons.append(
dict(label=names_df[x],
method="update",
args=[{"visible": viz},
{"title": "LAST RUN vs:"}])
)
fig.update_layout(
updatemenus=[
dict(active=0,
x = 0.5, y = 1.2,
buttons=list(buttons),
)
])
fig.update_layout(plot_bgcolor = 'rgb(255, 255, 255)')
fig.update_layout(yaxis=go.layout.YAxis(
title='pace in min/km',
showgrid=True, gridcolor='LightGrey'),
xaxis=go.layout.XAxis(
title='meters',
showgrid=False))
fig.update_layout(hovermode='closest')
fig.update_layout(title_text="LAST RUN vs:")
fig.show()
#return py.plot(fig)
#runs_array_cropped = runs_array[:3]
viz_line(out_df_norm, runs_array, names_array)
df_array_varlen = []
df_array_varlen.append( df_normalize(out_df, 15))
df_array_varlen.append( df_normalize(out_df, 30))
df_array_varlen.append( df_normalize(out_df, 60))
df_array_varlen.append( df_normalize(out_df, 120))
def viz_line_varlen(df_array):
fig = go.Figure()
i = 0
for i in range(len(df_array)):
if i != 0:
fig.add_trace(go.Scatter(
x=df_array[i].distance,
y=df_array[i]['pace min/km'],
name= "window size: " + str( 15 * (2 ** i)) + " seconds",
line = dict(color='#d73027'),
opacity = 0.8,
hovertemplate =
'Pace: %{y:.2f} min/km'+
'<br>Distance: %{x:.0f} m<br>', visible = False))
else :
fig.add_trace(go.Scatter(
x=df_array[i].distance,
y=df_array[i]['pace min/km'],
name= "window size: " + str( 15 * (2 ** i)) + " seconds",
line = dict(color='#d73027'),
opacity = 0.8,
hovertemplate =
'Pace: %{y:.2f} min/km'+
'<br>Distance: %{x:.0f} m<br>', visible = True))
visibility = []
for x in df_array:
visibility.append(False)
visibility2 = visibility.copy()
visibility[0] = True
buttons = []
for x in range(len(df_array)):
viz = visibility2.copy()
viz[x] = True
buttons.append(
dict(label= "window size: " + str( 15 * (2 ** x)) + " seconds",
method="update",
args=[{"visible": viz},
{"title": "window size: " + str( 15 * (2 ** x)) + " seconds",}])
)
fig.update_layout(
updatemenus=[
dict(active=0,
x = 0.7, y = 1.2,
buttons=list(buttons),
)
])
fig.update_layout(plot_bgcolor = 'rgb(255, 255, 255)')
fig.update_layout(yaxis=go.layout.YAxis(
title='pace in min/km',
showgrid=True, gridcolor='LightGrey'),
xaxis=go.layout.XAxis(
title='meters',
showgrid=False))
fig.update_layout(hovermode='closest')
fig.update_layout(title_text="window size: 15 seconds")
fig.show()
#return py.plot(fig)
viz_line_varlen(df_array_varlen)
To display the differences between the efforts of run A and the others, we chose to juxtapose two heatmaps in which the BPM are plotted against the pace. This choice lets the user easily compare the "effort areas" in which the two runs are concentrated in the BPM vs PACE space.
The selected color palette is again "plasma" for the same reasons as before.
The second heatmap allows to choose which run to compare with a dropdown menu.
from plotly.subplots import make_subplots
def viz_heatmap(out_df_norm):
y = out_df_norm['heartRate']
x = out_df_norm['pace min/km']
fig = go.Figure()
fig.add_trace(go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap',), colorbar=dict(title="# records")))
fig.add_trace(go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.05,), hovertemplate =
'Pace: %{x:.2f} min/km'+
'<br>BPM: %{y:.0f} <br>',))
fig.update_layout(yaxis=go.layout.YAxis(
range=[130, 195],
title='BPM',
showgrid=False),
xaxis=go.layout.XAxis(
range=[3, 8],
title='Pace in min/km',
showgrid=False))
fig.update_layout(plot_bgcolor = 'rgb(13, 8, 135)')
fig.update_layout()
fig.update_layout = go.Layout(
autosize=True,
width=1920,
height=1080
)
pio.write_image(fig, './density_with_points.png')
fig.show()
#return py.plot(fig)
viz_heatmap(out_df_norm)
def viz_heatmap_interactive(df_array, names_df):
fig = go.Figure()
i = 0
for out_df_norm in df_array:
y = out_df_norm['heartRate']
x = out_df_norm['pace min/km']
if i != 0:
fig.add_trace(go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap',), colorbar=dict(title="# records"), visible = False))
fig.add_trace(go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.05), visible = False, hovertemplate =
'Pace: %{x:.2f} min/km'+
'<br>BPM: %{y:.0f} <br>',))
else :
fig.add_trace(go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap',), colorbar=dict(title="# records"), visible = True))
fig.add_trace(go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.05), visible = True, hovertemplate =
'Pace: %{x:.2f} min/km'+
'<br>BPM: %{y:.0f} <br>',))
i = 1
fig.update_layout(yaxis=go.layout.YAxis(
range=[130, 195],
title='BPM',
showgrid=False),
xaxis=go.layout.XAxis(
range=[3, 8],
title='Pace in min/km',
showgrid=False))
fig.update_layout(plot_bgcolor = 'rgb(13, 8, 135)')
visibility = []
for x in df_array:
visibility.append(False)
visibility.append(False)
buttons = []
for x in range(len(df_array)):
viz = visibility.copy()
viz[2*x] = True
viz[2*x +1] = True
buttons.append(
dict(label=names_df[x],
method="update",
args=[{"visible": viz}])
)
fig.update_layout(
updatemenus=[
dict(active=0,
x = 0.8, y = 1.2,
buttons=list(buttons),
)
])
fig.show()
#py.plot(fig)
#runs_array_cropped = runs_array[:3]
viz_heatmap_interactive(runs_array, names_array)
# same visualization, without displaying all the datapoints
def viz_heatmap2(out_df_norm):
y = out_df_norm['heartRate']
x = out_df_norm['pace min/km']
fig = go.Figure()
fig.add_trace(go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap',), colorbar=dict(title="# records")))
fig.update_layout(yaxis=go.layout.YAxis(
range=[130, 195],
title='BPM',
showgrid=False),
xaxis=go.layout.XAxis(
range=[3, 8],
title='Pace in min/km',
showgrid=False))
fig.update_layout(plot_bgcolor = 'rgb(13, 8, 135)')
fig.update_layout()
fig.update_layout = go.Layout(
autosize=True,
width=1920,
height=1080
)
pio.write_image(fig, './fig2.svg', format ='svg')
fig.show()
#return py.plot(fig)
viz_heatmap2(all_df)
#displaying the same run with different windows size
def viz_heatmap_varlen(df_array):
fig = go.Figure()
i = 0
for out_df_norm in df_array:
y = out_df_norm['heartRate']
x = out_df_norm['pace min/km']
if i != 0:
fig.add_trace(go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap',), colorbar=dict(title="# records"), visible = False))
fig.add_trace(go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.05), visible = False, hovertemplate =
'Pace: %{x:.2f} min/km'+
'<br>BPM: %{y:.0f} <br>',))
else :
fig.add_trace(go.Histogram2dContour(x=x, y=y, contours=dict(coloring='heatmap',), colorbar=dict(title="# records"), visible = True))
fig.add_trace(go.Scatter(x=x, y=y, mode='markers', marker=dict(color='white', size=3, opacity=0.05), visible = True, hovertemplate =
'Pace: %{x:.2f} min/km'+
'<br>BPM: %{y:.0f} <br>',))
i = 1
fig.update_layout(yaxis=go.layout.YAxis(
range=[130, 195],
title='BPM',
showgrid=False),
xaxis=go.layout.XAxis(
range=[3, 8],
title='Pace in min/km',
showgrid=False))
fig.update_layout(plot_bgcolor = 'rgb(13, 8, 135)')
visibility = []
for x in df_array:
visibility.append(False)
visibility.append(False)
buttons = []
for x in range(len(df_array)):
viz = visibility.copy()
viz[2*x] = True
viz[2*x +1] = True
buttons.append(
dict(label="window size: " + str( 15 * (2 ** x)) + " seconds",
method="update",
args=[{"visible": viz},
{"title": "window size: " + str( 15 * (2 ** x)) + " seconds"}])
)
fig.update_layout(
updatemenus=[
dict(active=0,
x = 0.7, y = 1.2,
buttons=list(buttons),
)
])
fig.update_layout(title_text="window size: 15 seconds")
fig.show()
#py.plot(fig)
viz_heatmap_varlen(df_array_varlen)
We then started exploring two alternative visualizations.
In both of them we discretized the pace space in order to reduce complexity and using barplots instead of scatter we assure that the discriminability and the separability properties of the plots are granted.
In the first alternative the plot is really useful to visualize the distribution of the BPM over the pace in the two single run, since it makes it easier to compare the two runs as a whole.
def viz_piramid(df1, df2):
#df1 data preparation
df_viz1 = df1.sort_values("pace min/km")
df_viz1 = df_viz1.reset_index(drop = True)
avg_hr = []
count = []
i = 6
x = 0
heartsum = [0 , 0]
#heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while ((x < len(df_viz1)) and not (np.isnan(df_viz1.loc[x, 'pace min/km']))):
while (df_viz1.loc[x, 'pace min/km'] >= i/2):
if (heartsum[1] != 0):
avg_hr.append( heartsum[0] / heartsum[1])
count.append(heartsum[1])
else:
avg_hr.append(0)
count.append(heartsum[1])
heartsum = [0, 0]
i = i+1
heartsum[0] = heartsum[0] + df_viz1.loc[x, 'heartRate']
heartsum[1] = heartsum[1] + 1
x = x+1
avg_hr.append( heartsum[0] / heartsum[1])
count.append(heartsum[1])
#df2 data preparation
df_viz2 = df2.sort_values("pace min/km")
df_viz2 = df_viz2.reset_index(drop = True)
avg_hr2 = []
count2 = []
i = 6
x = 0
heartsum = [0 , 0]
#heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while ((x < len(df_viz2)) and not (np.isnan(df_viz2.loc[x, 'pace min/km']))):
while (df_viz2.loc[x, 'pace min/km'] >= i/2):
if (heartsum[1] != 0):
avg_hr2.append( heartsum[0] / heartsum[1])
count2.append(heartsum[1])
else:
avg_hr2.append(0)
count2.append(heartsum[1])
heartsum = [0, 0]
i = i+1
heartsum[0] = heartsum[0] + df_viz2.loc[x, 'heartRate']
heartsum[1] = heartsum[1] + 1
x = x+1
avg_hr2.append( heartsum[0] / heartsum[1])
count2.append(heartsum[1])
for x in range(len(avg_hr2)):
avg_hr2[x] = avg_hr2[x]* (-1)
y = list(range(6, 20, 1))
for x in range(len(y)):
y[x]= y[x]/2
fig = go.Figure()
# Edit the layout
fig.update_layout(title='BPM per pace in min/km comparison',
plot_bgcolor = 'rgba(0, 0, 0, 0)',
paper_bgcolor = 'rgba(0, 0, 0, 0)',
)
fig.update_layout(yaxis=go.layout.YAxis(
range=[3, 10],
tickvals=[ 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5],
ticktext=[ "2.5-3.0", "3.0-3.5", "3.5-4.0", "4.0-4.5", "4.5-5.0", "5.0-5.5", "5.5-6.0", "6.5-7.0", "7.0-7.5", "7.5-8.0", "8.0-8.5", "8.5-9.0", "9.0-9.5"],
title='pace in min/km',
showgrid=False),
xaxis=go.layout.XAxis(
range=[-220, 220],
tickvals=[-200, -150, -100, -50, 0, 50, 100, 150, 200],
ticktext=[200, 150, 100, 50, 0, 50, 100, 150, 200],
title='BPM',
showgrid=True, gridcolor='LightGrey'),
barmode='overlay',
bargap=0.1,
showlegend=False)
fig.add_trace(go.Bar(y=y,
x=avg_hr,
orientation='h',
name='RUN A',
text= np.array(count).astype('int'),
marker=dict(color=count, cmin=3, cmax = max(count+count2), colorscale="Blues", colorbar=dict(title="."),), hovertemplate =
'BPM: %{x:.0f} <br>'+
'count: %{text:.0f} <br>'
))
fig.add_trace(go.Bar(y=y,
x=avg_hr2,
orientation='h',
name='RUN B',
text= np.array(count2).astype('int'),
marker=dict(color=count2, cmin=3, cmax = max(count+count2), colorscale="Reds", colorbar=dict(title="number of records", x = 1, y = 0.5, showticklabels= False),), hovertemplate =
'BPM: %{x:.0f} <br>'+
'count: %{text:.0f} <br>'))
fig.update_layout(annotations=[
dict(xref='paper', yref='paper', x=0.6, y=-0.1,
xanchor='left', yanchor='top',
text=' <b>RUN A',
font= dict(size = 23, color='royalblue'),
showarrow=False),
dict(xref='paper', yref='paper', x=0.25, y=-0.1,
xanchor='left', yanchor='top',
text=' <b>RUN B',
font= dict(size = 23, color='red'),
showarrow=False),
])
fig.show()
#return py.plot(fig)
viz_piramid(out_df_norm, out_df_norm2)
In the second alternative we used a grouped bar plot to visualize the two runs.
This visualization is useful to compare the effort in the same pace segment of two runs.
In the end we chose to keep this visualization instead of the other because we thought that a runner can find it more useful to compare his/her improvements in terms of BPM in a specific pace segment. Furthermore we thought that this visualization leads to more precise comparison.
def viz_bar_interactive(df1, array_df, names_df):
#df1 data preparation
df_viz1 = df1.sort_values("pace min/km")
df_viz1 = df_viz1.reset_index(drop = True)
avg_hr = []
count = []
i = 2
k = 0
heartsum = [0 , 0]
#heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while ((k < len(df_viz1)) and not (np.isnan(df_viz1.loc[k, 'pace min/km']))):
while (df_viz1.loc[k, 'pace min/km'] >= i/2):
if (heartsum[1] != 0):
avg_hr.append( heartsum[0] / heartsum[1])
count.append(heartsum[1])
else:
avg_hr.append(0)
count.append(heartsum[1])
heartsum = [0, 0]
i = i+1
heartsum[0] = heartsum[0] + df_viz1.loc[k, 'heartRate']
heartsum[1] = heartsum[1] + 1
k = k+1
avg_hr.append( heartsum[0] / heartsum[1])
count.append(heartsum[1])
avg_hr_array = []
count_array = []
#df2 data preparation
for df2 in array_df:
df_viz2 = df2.sort_values("pace min/km")
df_viz2 = df_viz2.reset_index(drop = True)
avg_hr2 = []
count2 = []
i = 2
k = 0
heartsum = [0 , 0]
#heartsum[0] = out_df.loc[x, 'heartRate']
#sample distance from original dataset
while ((k < len(df_viz2)) and not (np.isnan(df_viz2.loc[k, 'pace min/km']))):
while (df_viz2.loc[k, 'pace min/km'] >= i/2):
if (heartsum[1] != 0):
avg_hr2.append( heartsum[0] / heartsum[1])
count2.append(heartsum[1])
else:
avg_hr2.append(0)
count2.append(heartsum[1])
heartsum = [0, 0]
i = i+1
heartsum[0] = heartsum[0] + df_viz2.loc[k, 'heartRate']
heartsum[1] = heartsum[1] + 1
k = k+1
avg_hr2.append( heartsum[0] / heartsum[1])
count2.append(heartsum[1])
avg_hr_array.append(avg_hr2)
count_array.append(count2)
x = list(range(2, 20, 1))
for counter in range(len(x)):
x[counter]= x[counter]/2
fig = go.Figure()
# Edit the layout
fig.update_layout(#title='BPM per pace in min/km comparison',
plot_bgcolor = 'rgb(255, 255, 255)',
)
fig.update_layout(xaxis=go.layout.XAxis(
range=[3.5, 10],
tickvals=[2, 2.5, 3, 3.5, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 8.5, 9, 9.5],
ticktext=["1.5-2.0", "2.0-2.5", "2.5-3.0", "3.0-3.5", "3.5-4.0", "4.0-4.5", "4.5-5.0", "5.0-5.5", "5.5-6.0", "6.0-6.5", "6.5-7.0", "7.0-7.5", "7.5-8.0", "8.0-8.5", "8.5-9.0", "9.0-9.5", ">9.5"],
title='pace in min/km',
showgrid=False),
yaxis=go.layout.YAxis(
range=[0, 220],
tickvals=[0, 50, 100, 150, 200],
ticktext=[0, 50, 100, 150, 200],
title='Average BPM',
showgrid=True, gridcolor='LightGrey'),
barmode='overlay',
bargap=0.2,
showlegend=False)
fig.add_trace(go.Bar(x=x,
y=avg_hr,
#orientation='h',
name='RUN Base',
text= np.array(count).astype('int'),
hoverinfo='text',
marker=dict(color=count, cmin=1, cmax = max(count+count_array[0]+count_array[1]+count_array[2]), colorscale="Blues", colorbar=dict(title=". last run", title_font_color="#4575b4"),),
hovertemplate =
'BPM: %{y:.0f} <br>'+
'count: %{text:.0f} <br>',))
for i in range(len(array_df)):
if i != 0:
fig.add_trace(go.Bar(x=x,
y=avg_hr_array[i],
#orientation='h',
name='RUN ' + str(i) ,
text= np.array(count_array[i]).astype('int'),
hoverinfo='text',
marker=dict(color=count_array[i], cmin=1, cmax = max(count+count_array[0]+count_array[1]+count_array[2]), colorscale="Reds",
colorbar=dict(title="other ",
title_font_color="#d73027",x = 1, y = 0.5, showticklabels= False)), visible=False, hovertemplate =
'BPM: %{y:.0f} <br>'+
'count: %{text:.0f} <br>',
))
else:
fig.add_trace(go.Bar(x=x,
y=avg_hr_array[i],
#orientation='h',
name='RUN ' + str(i) ,
text= np.array(count_array[i]).astype('int'),
hoverinfo='text',
#qua è da coreggere come trova il max, fa schifo così
marker=dict(color=count_array[i], cmin=3, cmax = max(count+count_array[0]+count_array[1]+count_array[2]), colorscale="Reds",
colorbar=dict(title="other ",
title_font_color="#d73027",x = 1, y = 0.5, showticklabels= False)), visible=True, hovertemplate =
'BPM: %{y:.0f} <br>'+
'count: %{text:.0f} <br>',
))
# Here we modify the tickangle of the xaxis, resulting in rotated labels.
fig.update_layout(barmode='group')#, xaxis_tickangle=-45)
visibility = [True]
for x in array_df:
visibility.append(False)
buttons = []
for x in range(len(array_df)):
viz = visibility.copy()
viz[x+1] = True
buttons.append(
dict(label=names_df[x],
method="update",
args=[{"visible": viz},
{"title": "LAST RUN vs:"}])
)
fig.update_layout(
updatemenus=[
dict(active=0,
x = 0.5, y = 1.2,
buttons=list(buttons),
)
])
annotations = []
# Adding labels
fig.update_layout( annotations=[
dict(xref='paper', yref='paper', x=1, y=1.05,
xanchor='left', yanchor='top',
text=' # records',
font=dict(size = 18),
showarrow=False),
])
fig.update_layout(title_text="LAST RUN vs:")
fig.show()
#py.plot(fig)
#runs_array_cropped = runs_array[2:5]
#names_array_cropped = names_array[2:5]
viz_bar_interactive(out_df_norm, runs_array, names_array)
Here we visualize the run as a path in the BPM - PACE space. We tried various discretization sizes for displaying this data, eventually also with a normalization in the form of smoothing window described before.
def viz_line_training(out_df):
fig = go.Figure()
fig.add_trace(go.Scatter(
x=out_df['pace min/km'],
y=out_df['heartRate'],
name= "RUN A",
line = dict(color='#4575b4'),
opacity = 0.8,
customdata = out_df[["distance", "heartRate", "pace min/km", "time"]],
hovertemplate =
'distance: %{customdata[0]}' +
'<br>BPM: %{customdata[1]:.1f}' +
'<br>Pace: %{customdata[2]:.2f} min/km'+
'<br>Seconds: %{customdata[3]} ',))
fig.add_annotation(
x=out_df['pace min/km'], # arrows' head
y=out_df['heartRate'], # arrows' head
#ax=40, # arrows' tail
#ay=0.3, # arrows' tail
xref='x',
yref='y',
#axref='x',
#ayref='y',
text='start', # if you want only the arrow
showarrow=True,
arrowhead=3,
arrowsize=1,
arrowwidth=1,
arrowcolor='black'
)
fig.update_layout(plot_bgcolor = 'rgb(255, 255, 255)')
fig.update_layout(yaxis=go.layout.YAxis(
title='BPM',
showgrid=True, gridcolor='LightGrey'),
xaxis=go.layout.XAxis(
title='pace in min/km',
showgrid=False))
fig.update_layout(hovermode='closest')
fig.show()
#return py.plot(fig)
def viz_line_training_array(array_df, names_df):
fig = go.Figure()
for i in range(len(array_df)):
if i != 0:
fig.add_trace(go.Scatter(
x=array_df[i]['pace min/km'],
y=array_df[i]['heartRate'],
name= names_df[i],
line = dict(color='#4575b4'),
opacity = 0.8,
customdata = array_df[i][["distance", "heartRate", "pace min/km", "time"]],
hovertemplate =
'distance: %{customdata[0]}' +
'<br>BPM: %{customdata[1]:.1f}' +
'<br>Pace: %{customdata[2]:.2f} min/km'+
'<br>Seconds: %{customdata[3]} ',
visible=False,))
else:
fig.add_trace(go.Scatter(
x=array_df[i]['pace min/km'],
y=array_df[i]['heartRate'],
name= names_df[i],
line = dict(color='#4575b4'),
opacity = 0.8,
customdata = array_df[i][["distance", "heartRate", "pace min/km", "time"]],
hovertemplate =
'distance: %{customdata[0]}' +
'<br>BPM: %{customdata[1]:.1f}' +
'<br>Pace: %{customdata[2]:.2f} min/km'+
'<br>Seconds: %{customdata[3]} ',
visible=True,))
annotations = []
for i in range(len(array_df)):
annotations.append(
[dict(x=array_df[i]['pace min/km'].iloc[0], # arrows' head
y=array_df[i]['heartRate'].iloc[0], # arrows' head
#ax=40, # arrows' tail
ay=35, # arrows' tail
xref='x',
yref='y',
#axref='x',
#ayref='y',
text='start', # if you want only the arrow
showarrow=True,
arrowhead=3,
arrowsize=1,
arrowwidth=1,
arrowcolor='black')]
)
visibility = []
for x in array_df:
visibility.append(False)
#visibility[0] = True
buttons = []
for x in range(len(array_df)):
viz = visibility.copy()
viz[x] = True
buttons.append(
dict(label=names_df[x],
method="update",
args=[{"visible": viz},
{
"annotations": annotations[x]}])
)
fig.update_layout(
updatemenus=[
dict(active=0,
x = 0.8, y = 1.2,
buttons=list(buttons),
)
])
fig.update_layout(plot_bgcolor = 'rgb(255, 255, 255)')
fig.update_layout(yaxis=go.layout.YAxis(
title='BPM',
showgrid=True, gridcolor='LightGrey'),
xaxis=go.layout.XAxis(
title='pace in min/km',
showgrid=False))
fig.update_layout(hovermode='closest')
fig.add_annotation(
x=array_df[0]['pace min/km'].iloc[0], # arrows' head
y=array_df[0]['heartRate'].iloc[0], # arrows' head
#ax=40, # arrows' tail
ay=35, # arrows' tail
xref='x',
yref='y',
#axref='x',
#ayref='y',
text='start', # if you want only the arrow
showarrow=True,
arrowhead=3,
arrowsize=1,
arrowwidth=1,
arrowcolor='black'
)
fig.show()
#return py.plot(fig)
runs_array1 = []
for x in range (len(runs_array)):
runs_array1.append(df_discretize(runs_array[x], 1000))
runs_array1[x] = runs_array1[x].iloc[1:]
#print(len(runs_array))
viz_line_training_array(runs_array1, names_array)
smooth_array = []
smooth_names = []
smooth_array.append(df_smooth(df_discretize(out_df_norm, 1000).iloc[1:].reset_index(drop=True), 1))
smooth_names.append("avg v every 1000 m, smooth window: 1")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 1000).iloc[1:].reset_index(drop=True), 3))
smooth_names.append("avg v every 1000 m, smooth window: 3")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 500).iloc[1:].reset_index(drop=True), 3))
smooth_names.append("avg v every 500 m, smooth window: 3")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 500).iloc[1:].reset_index(drop=True), 5))
smooth_names.append("avg v every 500 m, smooth window: 5")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 200).iloc[1:].reset_index(drop=True), 7))
smooth_names.append("avg v every 200 m, smooth window: 7")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 200).iloc[1:].reset_index(drop=True), 9))
smooth_names.append("avg v every 200 m, smooth window: 9")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 100).iloc[1:].reset_index(drop=True), 9))
smooth_names.append("avg v every 100 m, smooth window: 9")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 100).iloc[1:].reset_index(drop=True), 11))
smooth_names.append("avg v every 100 m, smooth window: 11")
viz_line_training_array(smooth_array, smooth_names)
smooth_array = []
smooth_names = []
smooth_array.append(df_smooth(df_discretize(out_df_norm, 200).iloc[1:].reset_index(drop=True), 1))
smooth_names.append("avg v every 200 m, smooth window: 1")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 200).iloc[1:].reset_index(drop=True), 3))
smooth_names.append("avg v every 200 m, smooth window: 3")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 200).iloc[1:].reset_index(drop=True), 5))
smooth_names.append("avg v every 200 m, smooth window: 5")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 200).iloc[1:].reset_index(drop=True), 7))
smooth_names.append("avg v every 200 m, smooth window: 7")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 200).iloc[1:].reset_index(drop=True), 9))
smooth_names.append("avg v every 200 m, smooth window: 9")
smooth_array.append(df_smooth(df_discretize(out_df_norm, 200).iloc[1:].reset_index(drop=True), 11))
smooth_names.append("avg v every 200 m, smooth window: 11")
viz_line_training_array(smooth_array, smooth_names)
Here we display the distribution of the data points with regards to the discrete pace step (bin) they belong to. We have used the stacked bar chart template to do this, in an unconventional way. In order to give a complete and immediate comprehension of the visualization, we have used both the percentage column (with hover) and the percentage_of_max column (color). This was because we want the user to be able to both A) recognize immediatly the distribution in the specific bin, exploiting the full color palette for each of them ;and B) understand with precision how many points (in percentage) are summarized in the single stack of the visualization.
def heatbar_viz(bpm_pace_df):
fig = go.Figure()
x = list(range(0, 20, 1))
for counter in range(len(x)):
x[counter]= x[counter]/2
y_array = []
for elem in x:
y_array.append(list(np.ones((210,), dtype=int)))
#print (y_array)
for elem in range(210):
col_array = bpm_pace_df[(bpm_pace_df["heartRate"] == elem)]["percentage_of_max"]
percentage_array = bpm_pace_df[(bpm_pace_df["heartRate"] == elem)]["percentage"]
fig.add_trace(go.Bar(x=x,
y=list(np.ones((20,), dtype=int)),
name='',
text= [elem] * 20,
customdata = percentage_array,
marker=dict(color= col_array, line=dict(width=0),
cmin=0, cmax = max(bpm_pace_df["percentage_of_max"]), colorscale="viridis", colorbar=dict(title="% of max value",), showscale=False),
hovertemplate =
'Percentage: %{customdata:.3f} % <br>'
'BPM: %{text:.0f} <br>',))
# Change the bar mode
fig.update_layout(yaxis=go.layout.YAxis(
range=[130, 195],
title='BPM',
showgrid=False),
xaxis=go.layout.XAxis(
range=[2.75, 8.25],
tickvals=x,
ticktext=["0-0.5", "0.5-1", "1-1.5", "1.5-2", "2-2.5", "2.5-3", "3-3.5", "3.5-4", "4-4.5", "4.5-5", "5-5.5", "5.5-6", "6-6.5", "6.5-7", "7-7.5", "7.5-8", "8-8.5", "8.5-9", "9-9.5", ">9.5"],
title='Pace in min/km',
showgrid=False))
#fig.update_traces(showscale=False)
fig.update_layout(barmode='stack')
fig.update_layout(showlegend=False)
fig.show()
#return py.plot(fig)
bpm_pace_df = df_bpm_pace(all_df)
heatbar_viz(bpm_pace_df)
bpm_pace_df = df_bpm_pace(out_df_norm)
heatbar_viz(bpm_pace_df)
def heatbar_viz_array(array_df, names_df):
fig = go.Figure()
x = list(range(0, 20, 1))
for counter in range(len(x)):
x[counter]= x[counter]/2
y_array = []
for elem in x:
y_array.append(list(np.ones((210,), dtype=int)))
#print (y_array)
for count in range(len(array_df)):
if count == 0:
for elem in range(210):
col_array = array_df[count][(array_df[count]["heartRate"] == elem)]["percentage_of_max"]
percentage_array = array_df[count][(array_df[count]["heartRate"] == elem)]["percentage"]
fig.add_trace(go.Bar(x=x,
y=list(np.ones((20,), dtype=int)),
name='',
text= [elem] * 20,
customdata = percentage_array,
marker=dict(color= col_array, line=dict(width=0),
cmin=0, cmax = max(array_df[count]["percentage_of_max"]), colorscale="viridis",
colorbar=dict(title="% of records",), showscale=False),
hovertemplate =
'Percentage: %{customdata:.3f} % <br>'
'BPM: %{text:.0f} <br>',
visible=True))
else:
for elem in range(210):
col_array = array_df[count][(array_df[count]["heartRate"] == elem)]["percentage_of_max"]
percentage_array = array_df[count][(array_df[count]["heartRate"] == elem)]["percentage"]
fig.add_trace(go.Bar(x=x,
y=list(np.ones((20,), dtype=int)),
name='',
text= [elem] * 20,
customdata = percentage_array,
marker=dict(color= col_array, line=dict(width=0),
cmin=0, cmax = max(array_df[count]["percentage_of_max"]), colorscale="viridis",
colorbar=dict(title="% of records",), showscale=False),
hovertemplate =
'Percentage: %{customdata:.3f} % <br>'
'BPM: %{text:.0f} <br>',
visible=False))
# Change the bar mode
visibility = []
for i in array_df:
for elem in range(210):
visibility.append(False)
#visibility[0] = True
buttons = []
for i in range(len(array_df)):
viz = visibility.copy()
for elem in range(210):
viz[i*210+elem] = True
buttons.append(
dict(label=names_df[i],
method="update",
args=[{"visible": viz},])
)
fig.update_layout(
updatemenus=[
dict(active=0,
x = 0.8, y = 1.2,
buttons=list(buttons),
)
])
fig.update_layout(yaxis=go.layout.YAxis(
range=[130, 195],
title='BPM',
showgrid=False),
xaxis=go.layout.XAxis(
range=[2.75, 8.25],
tickvals=x,
ticktext=["0-0.5", "0.5-1", "1-1.5", "1.5-2", "2-2.5", "2.5-3", "3-3.5", "3.5-4", "4-4.5", "4.5-5", "5-5.5", "5.5-6", "6-6.5", "6.5-7", "7-7.5", "7.5-8", "8-8.5", "8.5-9", "9-9.5", ">9.5"],
title='Pace in min/km',
showgrid=False))
fig.update_layout(barmode='stack')
fig.update_layout(showlegend=False)
fig.show()
#return py.plot(fig)
bpm_pace_array = []
for df in runs_array:
bpm_pace_array.append(df_bpm_pace(df))
#bpm_pace_array = bpm_pace_array[:2]
heatbar_viz_array(bpm_pace_array, names_array)
def heatbar_viz_dual(bpm_pace_df):
fig = go.Figure()
x = list(range(0, 210, 10))
y_array = []
for elem in x:
y_array.append(list(np.ones((100,), dtype=int)))
#print (y_array)
for elem in range(100):
col_array = bpm_pace_df[(bpm_pace_df["pace min/km"] == elem/10)]["percentage_of_max"]
percentage_array = bpm_pace_df[(bpm_pace_df["pace min/km"] == elem/10)]["percentage"]
fig.add_trace(go.Bar(x=x,
y=list(np.ones((21,), dtype=int)),
name='',
text= [elem/10] * 21,
customdata = percentage_array,
marker=dict(color= col_array, line=dict(width=0),
cmin=0, cmax = max(bpm_pace_df["percentage_of_max"]), colorscale="viridis", colorbar=dict(title="% of records",), showscale=False),
hovertemplate =
'Percentage: %{customdata:.3f} % <br>'
'Pace: %{text:.1f} <br>',))
# Change the bar mode
intervals = list(range(0, 210, 10))
ticks = []
for a in range(len(intervals)):
ticks.append( str(intervals[a]) + "-" + str(intervals[a]+10) )
fig.update_layout(xaxis=go.layout.XAxis(
range=[95, 195],
title='BPM',
tickvals=list(range(0, 210, 10)),
ticktext=ticks
),#showgrid=False),
yaxis=go.layout.YAxis(
range=[30, 90],
tickvals=list(range(0, 100, 10)),
ticktext=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
title='Pace in min/km',
))#showgrid=False))
fig.update_layout(barmode='stack')
fig.update_layout(showlegend=False)
fig.show()
#return py.plot(fig)
bpm_pace_df = df_bpm_pace_dual(all_df)
heatbar_viz_dual(bpm_pace_df)
bpm_pace_df = df_bpm_pace_dual(out_df_norm)
heatbar_viz_dual(bpm_pace_df)
def heatbar_viz_dual_array(array_df, names_array):
fig = go.Figure()
x = list(range(0, 210, 10))
y_array = []
for elem in x:
y_array.append(list(np.ones((100,), dtype=int)))
#print (y_array)
for count in range(len(array_df)):
if count == 0:
for elem in range(100):
col_array = array_df[count][(array_df[count]["pace min/km"] == elem/10)]["percentage_of_max"]
percentage_array = array_df[count][(array_df[count]["pace min/km"] == elem/10)]["percentage"]
fig.add_trace(go.Bar(x=x,
y=list(np.ones((21,), dtype=int)),
name='',
text= [elem/10] * 21,
customdata = percentage_array,
marker=dict(color= col_array, line=dict(width=0),
cmin=0, cmax = max(array_df[count]["percentage_of_max"]), colorscale="viridis", colorbar=dict(title="% of records",), showscale=False),
hovertemplate =
'Percentage: %{customdata:.3f} % <br>'
'Pace: %{text:.1f} <br>', visible=True))
else:
for elem in range(100):
col_array = array_df[count][(array_df[count]["pace min/km"] == elem/10)]["percentage_of_max"]
percentage_array = array_df[count][(array_df[count]["pace min/km"] == elem/10)]["percentage"]
fig.add_trace(go.Bar(x=x,
y=list(np.ones((21,), dtype=int)),
name='',
text= [elem/10] * 21,
customdata = percentage_array,
marker=dict(color= col_array, line=dict(width=0),
cmin=0, cmax = max(array_df[count]["percentage_of_max"]), colorscale="viridis", colorbar=dict(title="% of records",), showscale=False),
hovertemplate =
'Percentage: %{customdata:.3f} % <br>'
'Pace: %{text:.1f} <br>', visible=False))
# Change the bar mode
visibility = []
for i in array_df:
for elem in range(100):
visibility.append(False)
#visibility[0] = True
buttons = []
for i in range(len(array_df)):
viz = visibility.copy()
for elem in range(100):
viz[i*100+elem] = True
buttons.append(
dict(label=names_array[i],
method="update",
args=[{"visible": viz},])
)
fig.update_layout(
updatemenus=[
dict(active=0,
x = 0.8, y = 1.2,
buttons=list(buttons),
)
])
intervals = list(range(0, 210, 10))
ticks = []
for a in range(len(intervals)):
ticks.append( str(intervals[a]) + "-" + str(intervals[a]+10) )
fig.update_layout(xaxis=go.layout.XAxis(
range=[95, 195],
title='BPM',
tickvals=list(range(0, 210, 10)),
ticktext=ticks
),#showgrid=False),
yaxis=go.layout.YAxis(
range=[30, 90],
tickvals=list(range(0, 100, 10)),
ticktext=["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"],
title='Pace in min/km',
))#showgrid=False))
fig.update_layout(barmode='stack')
fig.update_layout(showlegend=False)
fig.show()
#return py.plot(fig)
bpm_pace_array_dual = []
for df in runs_array:
bpm_pace_array_dual.append(df_bpm_pace_dual(df))
heatbar_viz_dual_array(bpm_pace_array_dual, names_array)
In this part of the notebook we show the discarded "discrete" approach, that made use of the discretized datasets created as described in the DISCRETIZATION OF DATASET section.
This approach was discarded mainly because, as stated above:
"As a drawback of this method to normalize the speed there is the fact that we "lose" a lot of data point that gets summarized, resulting in less accuracy. Furthermore, not all the blocks are composed by the same number of data points from the original dataset."
This can be seen in the visualizations, that result in simpler and not enough deep descriptions of data.
discr_df = df_discretize(out_df, 100)
discr_array = [df_discretize(out_df2, 100)]
viz_line(discr_df, discr_array, ["discrete comparison"])
viz_heatmap(df_discretize(out_df, 100))
viz_heatmap(df_discretize(out_df2, 100))
viz_piramid(df_discretize(out_df, 100), df_discretize(out_df2, 100))
names = []
for i in range(30):
names.append(str(i))
viz_line(out_df_norm3, df_list, names)
df_list[20]
viz_line_training(df_discretize(df_list[20],1000))